{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CHAPTER 6 - PANDAS IN DEPTH - DATA MANIPULATION"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Merging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ball</td>\n",
       "      <td>12.33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pencil</td>\n",
       "      <td>11.44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>pen</td>\n",
       "      <td>33.21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>mug</td>\n",
       "      <td>13.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ashtray</td>\n",
       "      <td>33.62</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        id  price\n",
       "0     ball  12.33\n",
       "1   pencil  11.44\n",
       "2      pen  33.21\n",
       "3      mug  13.23\n",
       "4  ashtray  33.62"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame1 = pd.DataFrame( {'id':['ball','pencil','pen','mug','ashtray'],\n",
    "                        'price': [12.33,11.44,33.21,13.23,33.62]})\n",
    "frame1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>red</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>black</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      id\n",
       "0  white  pencil\n",
       "1    red  pencil\n",
       "2    red    ball\n",
       "3  black     pen"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2 = pd.DataFrame( {'id': ['pencil','pencil','ball','pen'],\n",
    "                        'color':['white','red','red','black']})\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>price</th>\n",
       "      <th>color</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ball</td>\n",
       "      <td>12.33</td>\n",
       "      <td>red</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pencil</td>\n",
       "      <td>11.44</td>\n",
       "      <td>white</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>pencil</td>\n",
       "      <td>11.44</td>\n",
       "      <td>red</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>pen</td>\n",
       "      <td>33.21</td>\n",
       "      <td>black</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id  price  color\n",
       "0    ball  12.33    red\n",
       "1  pencil  11.44  white\n",
       "2  pencil  11.44    red\n",
       "3     pen  33.21  black"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1,frame2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>POD</td>\n",
       "      <td>black</td>\n",
       "      <td>,mug</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PPOD</td>\n",
       "      <td>green</td>\n",
       "      <td>ashtray</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand  color       id\n",
       "0   OMG  white     ball\n",
       "1   ABC    red   pencil\n",
       "2   ABC    red      pen\n",
       "3   POD  black     ,mug\n",
       "4  PPOD  green  ashtray"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame1 = pd.DataFrame( {'id': ['ball','pencil','pen',',mug','ashtray'],\n",
    "                        'color': ['white','red','red','black','green'],\n",
    "                        'brand': ['OMG','ABC','ABC','POD','PPOD']})\n",
    "frame1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>POD</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>POD</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand      id\n",
       "0   OMG  pencil\n",
       "1   POD  pencil\n",
       "2   ABC    ball\n",
       "3   POD     pen"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2 = pd.DataFrame( {'id':['pencil','pencil','ball','pen'],\n",
    "                        'brand': ['OMG','POD','ABC','POD']})\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [brand, color, id]\n",
       "Index: []"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1,frame2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand_x</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "      <th>brand_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>ABC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>OMG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand_x  color      id brand_y\n",
       "0     OMG  white    ball     ABC\n",
       "1     ABC    red  pencil     OMG\n",
       "2     ABC    red  pencil     POD\n",
       "3     ABC    red     pen     POD"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1, frame2, on='id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand</th>\n",
       "      <th>color</th>\n",
       "      <th>id_x</th>\n",
       "      <th>id_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>POD</td>\n",
       "      <td>black</td>\n",
       "      <td>,mug</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>POD</td>\n",
       "      <td>black</td>\n",
       "      <td>,mug</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand  color    id_x    id_y\n",
       "0   OMG  white    ball  pencil\n",
       "1   ABC    red  pencil    ball\n",
       "2   ABC    red     pen    ball\n",
       "3   POD  black    ,mug  pencil\n",
       "4   POD  black    ,mug     pen"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1, frame2, on='brand')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand</th>\n",
       "      <th>sid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>POD</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>POD</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand     sid\n",
       "0   OMG  pencil\n",
       "1   POD  pencil\n",
       "2   ABC    ball\n",
       "3   POD     pen"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2.columns = ['brand','sid']\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand_x</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "      <th>brand_y</th>\n",
       "      <th>sid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>ABC</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>OMG</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>POD</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>POD</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand_x  color      id brand_y     sid\n",
       "0     OMG  white    ball     ABC    ball\n",
       "1     ABC    red  pencil     OMG  pencil\n",
       "2     ABC    red  pencil     POD  pencil\n",
       "3     ABC    red     pen     POD     pen"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1, frame2, left_on='id', right_on='sid')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand_x</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "      <th>brand_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>ABC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>OMG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand_x  color      id brand_y\n",
       "0     OMG  white    ball     ABC\n",
       "1     ABC    red  pencil     OMG\n",
       "2     ABC    red  pencil     POD\n",
       "3     ABC    red     pen     POD"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2.columns = ['brand','id']\n",
    "pd.merge(frame1,frame2,on='id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand_x</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "      <th>brand_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>ABC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>OMG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>POD</td>\n",
       "      <td>black</td>\n",
       "      <td>,mug</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>PPOD</td>\n",
       "      <td>green</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand_x  color       id brand_y\n",
       "0     OMG  white     ball     ABC\n",
       "1     ABC    red   pencil     OMG\n",
       "2     ABC    red   pencil     POD\n",
       "3     ABC    red      pen     POD\n",
       "4     POD  black     ,mug     NaN\n",
       "5    PPOD  green  ashtray     NaN"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1,frame2,on='id',how='outer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand_x</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "      <th>brand_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>ABC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>OMG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>POD</td>\n",
       "      <td>black</td>\n",
       "      <td>,mug</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>PPOD</td>\n",
       "      <td>green</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand_x  color       id brand_y\n",
       "0     OMG  white     ball     ABC\n",
       "1     ABC    red   pencil     OMG\n",
       "2     ABC    red   pencil     POD\n",
       "3     ABC    red      pen     POD\n",
       "4     POD  black     ,mug     NaN\n",
       "5    PPOD  green  ashtray     NaN"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1,frame2,on='id',how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand_x</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "      <th>brand_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>ABC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>OMG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>POD</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand_x  color      id brand_y\n",
       "0     OMG  white    ball     ABC\n",
       "1     ABC    red  pencil     OMG\n",
       "2     ABC    red  pencil     POD\n",
       "3     ABC    red     pen     POD"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1,frame2,on='id',how='right')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand</th>\n",
       "      <th>color</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>POD</td>\n",
       "      <td>black</td>\n",
       "      <td>,mug</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PPOD</td>\n",
       "      <td>green</td>\n",
       "      <td>ashtray</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>OMG</td>\n",
       "      <td>NaN</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>POD</td>\n",
       "      <td>NaN</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>ABC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>POD</td>\n",
       "      <td>NaN</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand  color       id\n",
       "0   OMG  white     ball\n",
       "1   ABC    red   pencil\n",
       "2   ABC    red      pen\n",
       "3   POD  black     ,mug\n",
       "4  PPOD  green  ashtray\n",
       "5   OMG    NaN   pencil\n",
       "6   POD    NaN   pencil\n",
       "7   ABC    NaN     ball\n",
       "8   POD    NaN      pen"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1,frame2,on=['id','brand'], how='outer')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Merging on Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand_x</th>\n",
       "      <th>color</th>\n",
       "      <th>id_x</th>\n",
       "      <th>brand_y</th>\n",
       "      <th>id_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>OMG</td>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>OMG</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>POD</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ABC</td>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>ABC</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>POD</td>\n",
       "      <td>black</td>\n",
       "      <td>,mug</td>\n",
       "      <td>POD</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  brand_x  color    id_x brand_y    id_y\n",
       "0     OMG  white    ball     OMG  pencil\n",
       "1     ABC    red  pencil     POD  pencil\n",
       "2     ABC    red     pen     ABC    ball\n",
       "3     POD  black    ,mug     POD     pen"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame1,frame2,right_index=True, left_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "columns overlap but no suffix specified: Index(['brand', 'id'], dtype='object')",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-18-a836bc147511>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mframe1\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mframe2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mjoin\u001b[1;34m(self, other, on, how, lsuffix, rsuffix, sort)\u001b[0m\n\u001b[0;32m   5314\u001b[0m         \u001b[1;31m# For SparseDataFrame's benefit\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   5315\u001b[0m         return self._join_compat(other, on=on, how=how, lsuffix=lsuffix,\n\u001b[1;32m-> 5316\u001b[1;33m                                  rsuffix=rsuffix, sort=sort)\n\u001b[0m\u001b[0;32m   5317\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   5318\u001b[0m     def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_join_compat\u001b[1;34m(self, other, on, how, lsuffix, rsuffix, sort)\u001b[0m\n\u001b[0;32m   5329\u001b[0m             return merge(self, other, left_on=on, how=how,\n\u001b[0;32m   5330\u001b[0m                          \u001b[0mleft_index\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mon\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mright_index\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5331\u001b[1;33m                          suffixes=(lsuffix, rsuffix), sort=sort)\n\u001b[0m\u001b[0;32m   5332\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   5333\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mon\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001b[0m in \u001b[0;36mmerge\u001b[1;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[0;32m     56\u001b[0m                          \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindicator\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mindicator\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     57\u001b[0m                          validate=validate)\n\u001b[1;32m---> 58\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     60\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    586\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    587\u001b[0m         llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf,\n\u001b[1;32m--> 588\u001b[1;33m                                                      rdata.items, rsuf)\n\u001b[0m\u001b[0;32m    589\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    590\u001b[0m         \u001b[0mlindexers\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mleft_indexer\u001b[0m\u001b[1;33m}\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mleft_indexer\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mitems_overlap_with_suffix\u001b[1;34m(left, lsuffix, right, rsuffix)\u001b[0m\n\u001b[0;32m   5024\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mlsuffix\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mrsuffix\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   5025\u001b[0m             raise ValueError('columns overlap but no suffix specified: %s' %\n\u001b[1;32m-> 5026\u001b[1;33m                              to_rename)\n\u001b[0m\u001b[0;32m   5027\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   5028\u001b[0m         \u001b[1;32mdef\u001b[0m \u001b[0mlrenamer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: columns overlap but no suffix specified: Index(['brand', 'id'], dtype='object')"
     ]
    }
   ],
   "source": [
    "frame1.join(frame2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame2.columns = ['brand2','id2']\n",
    "frame1.join(frame2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concatenating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "array1 = np.arange(9).reshape((3,3))\n",
    "array1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "array2 = np.arange(9).reshape((3,3))+6\n",
    "array2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.concatenate([array1,array2],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'array1' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-19-268b4623f406>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0marray1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0marray2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m: name 'array1' is not defined"
     ]
    }
   ],
   "source": [
    "np.concatenate([array1,array2],axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    0.953608\n",
       "2    0.929539\n",
       "3    0.036994\n",
       "4    0.010650\n",
       "dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser1 = pd.Series(np.random.rand(4), index=[1,2,3,4])\n",
    "ser1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5    0.200771\n",
       "6    0.709060\n",
       "7    0.813766\n",
       "8    0.218998\n",
       "dtype: float64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser2 = pd.Series(np.random.rand(4), index=[5,6,7,8])\n",
    "ser2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    0.953608\n",
       "2    0.929539\n",
       "3    0.036994\n",
       "4    0.010650\n",
       "5    0.200771\n",
       "6    0.709060\n",
       "7    0.813766\n",
       "8    0.218998\n",
       "dtype: float64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([ser1,ser2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.953608</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.929539</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.036994</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.010650</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.200771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.709060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.813766</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.218998</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1\n",
       "1  0.953608       NaN\n",
       "2  0.929539       NaN\n",
       "3  0.036994       NaN\n",
       "4  0.010650       NaN\n",
       "5       NaN  0.200771\n",
       "6       NaN  0.709060\n",
       "7       NaN  0.813766\n",
       "8       NaN  0.218998"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([ser1,ser2], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [0, 1]\n",
       "Index: []"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([ser1,ser2], axis=1, join='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1  1    0.953608\n",
       "   2    0.929539\n",
       "   3    0.036994\n",
       "   4    0.010650\n",
       "2  5    0.200771\n",
       "   6    0.709060\n",
       "   7    0.813766\n",
       "   8    0.218998\n",
       "dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([ser1,ser2], keys=[1,2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.953608</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.929539</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.036994</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.010650</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.200771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.709060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.813766</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.218998</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          1         2\n",
       "1  0.953608       NaN\n",
       "2  0.929539       NaN\n",
       "3  0.036994       NaN\n",
       "4  0.010650       NaN\n",
       "5       NaN  0.200771\n",
       "6       NaN  0.709060\n",
       "7       NaN  0.813766\n",
       "8       NaN  0.218998"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([ser1,ser2], axis=1, keys=[1,2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.231057</td>\n",
       "      <td>0.024329</td>\n",
       "      <td>0.843888</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.727480</td>\n",
       "      <td>0.296619</td>\n",
       "      <td>0.367309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.282516</td>\n",
       "      <td>0.524227</td>\n",
       "      <td>0.462000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.078044</td>\n",
       "      <td>0.751505</td>\n",
       "      <td>0.832853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.843225</td>\n",
       "      <td>0.945914</td>\n",
       "      <td>0.141331</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.189217</td>\n",
       "      <td>0.799631</td>\n",
       "      <td>0.308749</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C\n",
       "1  0.231057  0.024329  0.843888\n",
       "2  0.727480  0.296619  0.367309\n",
       "3  0.282516  0.524227  0.462000\n",
       "4  0.078044  0.751505  0.832853\n",
       "5  0.843225  0.945914  0.141331\n",
       "6  0.189217  0.799631  0.308749"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame1 = pd.DataFrame(np.random.rand(9).reshape(3,3), index=[1,2,3], columns=['A','B','C'])\n",
    "frame2 = pd.DataFrame(np.random.rand(9).reshape(3,3), index=[4,5,6], columns=['A','B','C'])\n",
    "pd.concat([frame1, frame2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.231057</td>\n",
       "      <td>0.024329</td>\n",
       "      <td>0.843888</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.727480</td>\n",
       "      <td>0.296619</td>\n",
       "      <td>0.367309</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.282516</td>\n",
       "      <td>0.524227</td>\n",
       "      <td>0.462000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.078044</td>\n",
       "      <td>0.751505</td>\n",
       "      <td>0.832853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.843225</td>\n",
       "      <td>0.945914</td>\n",
       "      <td>0.141331</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.189217</td>\n",
       "      <td>0.799631</td>\n",
       "      <td>0.308749</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C         A         B         C\n",
       "1  0.231057  0.024329  0.843888       NaN       NaN       NaN\n",
       "2  0.727480  0.296619  0.367309       NaN       NaN       NaN\n",
       "3  0.282516  0.524227  0.462000       NaN       NaN       NaN\n",
       "4       NaN       NaN       NaN  0.078044  0.751505  0.832853\n",
       "5       NaN       NaN       NaN  0.843225  0.945914  0.141331\n",
       "6       NaN       NaN       NaN  0.189217  0.799631  0.308749"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([frame1, frame2], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Combining"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    0.075815\n",
       "2    0.332282\n",
       "3    0.884463\n",
       "4    0.518336\n",
       "5    0.089025\n",
       "dtype: float64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser1 = pd.Series(np.random.rand(5), index=[1,2,3,4,5])\n",
    "ser1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2    0.315847\n",
       "4    0.275937\n",
       "5    0.352538\n",
       "6    0.865549\n",
       "dtype: float64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser2 = pd.Series(np.random.rand(4), index=[2,4,5,6])\n",
    "ser2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    0.075815\n",
       "2    0.332282\n",
       "3    0.884463\n",
       "4    0.518336\n",
       "5    0.089025\n",
       "6    0.865549\n",
       "dtype: float64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser1.combine_first(ser2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    0.075815\n",
       "2    0.315847\n",
       "3    0.884463\n",
       "4    0.275937\n",
       "5    0.352538\n",
       "6    0.865549\n",
       "dtype: float64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser2.combine_first(ser1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    0.075815\n",
       "2    0.332282\n",
       "3    0.884463\n",
       "4    0.275937\n",
       "5    0.352538\n",
       "dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser1[:3].combine_first(ser2[:3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pivoting"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pivoting with Hierarchical Indexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ball</th>\n",
       "      <th>pen</th>\n",
       "      <th>pencil</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>black</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       ball  pen  pencil\n",
       "white     0    1       2\n",
       "black     3    4       5\n",
       "red       6    7       8"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame1 = pd.DataFrame(np.arange(9).reshape(3,3),\n",
    "                     index=['white','black','red'],\n",
    "                     columns=['ball','pen','pencil'])\n",
    "frame1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "white  ball      0\n",
       "       pen       1\n",
       "       pencil    2\n",
       "black  ball      3\n",
       "       pen       4\n",
       "       pencil    5\n",
       "red    ball      6\n",
       "       pen       7\n",
       "       pencil    8\n",
       "dtype: int32"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser = frame1.stack()\n",
    "ser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ball</th>\n",
       "      <th>pen</th>\n",
       "      <th>pencil</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>black</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       ball  pen  pencil\n",
       "white     0    1       2\n",
       "black     3    4       5\n",
       "red       6    7       8"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser.unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>white</th>\n",
       "      <th>black</th>\n",
       "      <th>red</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ball</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pen</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pencil</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        white  black  red\n",
       "ball        0      3    6\n",
       "pen         1      4    7\n",
       "pencil      2      5    8"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser.unstack(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pivoting from \"Long\" to \"Wide\" Format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>item</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>0.896313</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>white</td>\n",
       "      <td>pen</td>\n",
       "      <td>0.344864</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>white</td>\n",
       "      <td>mug</td>\n",
       "      <td>0.101891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>red</td>\n",
       "      <td>ball</td>\n",
       "      <td>0.697267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>red</td>\n",
       "      <td>pen</td>\n",
       "      <td>0.852835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "      <td>0.145385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>black</td>\n",
       "      <td>ball</td>\n",
       "      <td>0.738799</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>black</td>\n",
       "      <td>pen</td>\n",
       "      <td>0.783870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>black</td>\n",
       "      <td>mug</td>\n",
       "      <td>0.017153</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   color  item     value\n",
       "0  white  ball  0.896313\n",
       "1  white   pen  0.344864\n",
       "2  white   mug  0.101891\n",
       "3    red  ball  0.697267\n",
       "4    red   pen  0.852835\n",
       "5    red   mug  0.145385\n",
       "6  black  ball  0.738799\n",
       "7  black   pen  0.783870\n",
       "8  black   mug  0.017153"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "longframe = pd.DataFrame({ 'color':['white','white','white',\n",
    "                                    'red','red','red',\n",
    "                                    'black','black','black'],\n",
    "                           'item':['ball','pen','mug',\n",
    "                                   'ball','pen','mug',\n",
    "                                   'ball','pen','mug'],\n",
    "                           'value': np.random.rand(9)})\n",
    "longframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">value</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>item</th>\n",
       "      <th>ball</th>\n",
       "      <th>mug</th>\n",
       "      <th>pen</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>color</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>black</th>\n",
       "      <td>0.738799</td>\n",
       "      <td>0.017153</td>\n",
       "      <td>0.783870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>0.697267</td>\n",
       "      <td>0.145385</td>\n",
       "      <td>0.852835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>0.896313</td>\n",
       "      <td>0.101891</td>\n",
       "      <td>0.344864</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          value                    \n",
       "item       ball       mug       pen\n",
       "color                              \n",
       "black  0.738799  0.017153  0.783870\n",
       "red    0.697267  0.145385  0.852835\n",
       "white  0.896313  0.101891  0.344864"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wideframe = longframe.pivot('color','item')\n",
    "wideframe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Removing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ball</th>\n",
       "      <th>pen</th>\n",
       "      <th>pencil</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>black</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       ball  pen  pencil\n",
       "white     0    1       2\n",
       "black     3    4       5\n",
       "red       6    7       8"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame1 = pd.DataFrame(np.arange(9).reshape(3,3),\n",
    "                       index=['white','black','red'],\n",
    "                       columns=['ball','pen','pencil'])\n",
    "frame1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pen</th>\n",
       "      <th>pencil</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>black</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       pen  pencil\n",
       "white    1       2\n",
       "black    4       5\n",
       "red      7       8"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "del frame1['ball']\n",
    "frame1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pen</th>\n",
       "      <th>pencil</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>black</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       pen  pencil\n",
       "black    4       5\n",
       "red      7       8"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame1.drop('white')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Transformation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Removing Duplicates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>white</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>red</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>red</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>white</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   color  value\n",
       "0  white      2\n",
       "1  white      1\n",
       "2    red      3\n",
       "3    red      3\n",
       "4  white      2"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dframe = pd.DataFrame({ 'color': ['white','white','red','red','white'],\n",
    "                        'value': [2,1,3,3,2]})\n",
    "dframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    False\n",
       "1    False\n",
       "2    False\n",
       "3     True\n",
       "4     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dframe.duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>red</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>white</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   color  value\n",
       "3    red      3\n",
       "4  white      2"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dframe[dframe.duplicated()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Replacing Values via Mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>item</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>rosso</td>\n",
       "      <td>mug</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>verde</td>\n",
       "      <td>pen</td>\n",
       "      <td>1.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    color     item  price\n",
       "0   white     ball   5.56\n",
       "1   rosso      mug   4.20\n",
       "2   verde      pen   1.30\n",
       "3   black   pencil   0.56\n",
       "4  yellow  ashtray   2.75"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame({ 'item':['ball','mug','pen','pencil','ashtray'],\n",
    "                       'color':['white','rosso','verde','black','yellow'],\n",
    "                       'price':[5.56,4.20,1.30,0.56,2.75]})\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "newcolors = {\n",
    "    'rosso': 'red',\n",
    "    'verde': 'green'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>item</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "      <td>1.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    color     item  price\n",
       "0   white     ball   5.56\n",
       "1     red      mug   4.20\n",
       "2   green      pen   1.30\n",
       "3   black   pencil   0.56\n",
       "4  yellow  ashtray   2.75"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.replace(newcolors)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "1    3.0\n",
       "2    NaN\n",
       "3    4.0\n",
       "4    6.0\n",
       "5    NaN\n",
       "6    3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser = pd.Series([1,3,np.nan,4,6,np.nan,3])\n",
    "ser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "1    3.0\n",
       "2    0.0\n",
       "3    4.0\n",
       "4    6.0\n",
       "5    0.0\n",
       "6    3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser.replace(np.nan,0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Adding Values via Mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    color     item\n",
       "0   white     ball\n",
       "1     red      mug\n",
       "2   green      pen\n",
       "3   black   pencil\n",
       "4  yellow  ashtray"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame({'item':['ball','mug','pen','pencil','ashtray'],\n",
    "                      'color':['white','red','green','black','yellow']})\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "price = {\n",
    "    'ball': 5.56,\n",
    "    'mug': 4.20,\n",
    "    'bottle': 1.30,\n",
    "    'scissors': 3.41,\n",
    "    'pen': 1.30,\n",
    "    'pencil': 0.56,\n",
    "    'ashtray': 2.75\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>item</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "      <td>1.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    color     item  price\n",
       "0   white     ball   5.56\n",
       "1     red      mug   4.20\n",
       "2   green      pen   1.30\n",
       "3   black   pencil   0.56\n",
       "4  yellow  ashtray   2.75"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame['price'] = frame['item'].map(price)\n",
    "frame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Rename the Indexes of the Axes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>item</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "      <td>1.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    color     item  price\n",
       "0   white     ball   5.56\n",
       "1     red      mug   4.20\n",
       "2   green      pen   1.30\n",
       "3   black   pencil   0.56\n",
       "4  yellow  ashtray   2.75"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>item</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "      <td>1.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fifth</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         color     item  price\n",
       "first    white     ball   5.56\n",
       "second     red      mug   4.20\n",
       "third    green      pen   1.30\n",
       "fourth   black   pencil   0.56\n",
       "fifth   yellow  ashtray   2.75"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reindex = {\n",
    "    0: 'first',\n",
    "    1: 'second',\n",
    "    2: 'third',\n",
    "    3: 'fourth',\n",
    "    4: 'fifth'\n",
    "}\n",
    "frame.rename(reindex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>object</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "      <td>1.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fifth</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         color   object  price\n",
       "first    white     ball   5.56\n",
       "second     red      mug   4.20\n",
       "third    green      pen   1.30\n",
       "fourth   black   pencil   0.56\n",
       "fifth   yellow  ashtray   2.75"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recolumn = {\n",
    "    'item': 'object',\n",
    "    'prince': 'value'\n",
    "}\n",
    "frame.rename(index=reindex, columns=recolumn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>object</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "      <td>1.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        color   object  price\n",
       "0       white     ball   5.56\n",
       "first     red      mug   4.20\n",
       "2       green      pen   1.30\n",
       "3       black   pencil   0.56\n",
       "4      yellow  ashtray   2.75"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.rename(index={1:'first'}, columns={'item':'object'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "frame.rename(columns={'item':'object'}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>object</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>ball</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>mug</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "      <td>1.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>black</td>\n",
       "      <td>pencil</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>yellow</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    color   object  price\n",
       "0   white     ball   5.56\n",
       "1     red      mug   4.20\n",
       "2   green      pen   1.30\n",
       "3   black   pencil   0.56\n",
       "4  yellow  ashtray   2.75"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Discretization and Binning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0, 25], (25, 50], (50, 75], (50, 75], (25, 50], ..., (75, 100], (0, 25], (25, 50], (75, 100], (75, 100]]\n",
       "Length: 17\n",
       "Categories (4, interval[int64]): [(0, 25] < (25, 50] < (50, 75] < (75, 100]]"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results = [12,34,67,55,28,90,99,12,3,56,74,44,87,23,49,89,87]\n",
    "bins = [0,25,50,75,100]\n",
    "cat = pd.cut(results, bins)\n",
    "cat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "IntervalIndex([(0, 25], (25, 50], (50, 75], (75, 100]]\n",
       "              closed='right',\n",
       "              dtype='interval[int64]')"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cat.categories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 2, 2, 1, 3, 3, 0, 0, 2, 2, 1, 3, 0, 1, 3, 3], dtype=int8)"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cat.codes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(75, 100]    5\n",
       "(50, 75]     4\n",
       "(25, 50]     4\n",
       "(0, 25]      4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.value_counts(cat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[unlikely, less likely, likely, likely, less likely, ..., highly likely, unlikely, less likely, highly likely, highly likely]\n",
       "Length: 17\n",
       "Categories (4, object): [unlikely < less likely < likely < highly likely]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bin_names = ['unlikely','less likely','likely','highly likely']\n",
    "pd.cut(results, bins, labels=bin_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(2.904, 22.2], (22.2, 41.4], (60.6, 79.8], (41.4, 60.6], (22.2, 41.4], ..., (79.8, 99.0], (22.2, 41.4], (41.4, 60.6], (79.8, 99.0], (79.8, 99.0]]\n",
       "Length: 17\n",
       "Categories (5, interval[float64]): [(2.904, 22.2] < (22.2, 41.4] < (41.4, 60.6] < (60.6, 79.8] < (79.8, 99.0]]"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.cut(results, 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(2.999, 24.0], (24.0, 46.0], (62.6, 87.0], (46.0, 62.6], (24.0, 46.0], ..., (62.6, 87.0], (2.999, 24.0], (46.0, 62.6], (87.0, 99.0], (62.6, 87.0]]\n",
       "Length: 17\n",
       "Categories (5, interval[float64]): [(2.999, 24.0] < (24.0, 46.0] < (46.0, 62.6] < (62.6, 87.0] < (87.0, 99.0]]"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quintiles = pd.qcut(results, 5)\n",
    "quintiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(62.6, 87.0]     4\n",
       "(2.999, 24.0]    4\n",
       "(87.0, 99.0]     3\n",
       "(46.0, 62.6]     3\n",
       "(24.0, 46.0]     3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.value_counts(quintiles)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Detecting and Filtering Outliers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>-0.036163</td>\n",
       "      <td>0.037203</td>\n",
       "      <td>0.018722</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.038703</td>\n",
       "      <td>0.986338</td>\n",
       "      <td>1.011587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-3.591217</td>\n",
       "      <td>-3.816239</td>\n",
       "      <td>-3.586733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-0.729458</td>\n",
       "      <td>-0.581396</td>\n",
       "      <td>-0.665261</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>-0.025864</td>\n",
       "      <td>0.005155</td>\n",
       "      <td>-0.002774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.674396</td>\n",
       "      <td>0.706958</td>\n",
       "      <td>0.731404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>3.115554</td>\n",
       "      <td>2.899073</td>\n",
       "      <td>3.425400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 0            1            2\n",
       "count  1000.000000  1000.000000  1000.000000\n",
       "mean     -0.036163     0.037203     0.018722\n",
       "std       1.038703     0.986338     1.011587\n",
       "min      -3.591217    -3.816239    -3.586733\n",
       "25%      -0.729458    -0.581396    -0.665261\n",
       "50%      -0.025864     0.005155    -0.002774\n",
       "75%       0.674396     0.706958     0.731404\n",
       "max       3.115554     2.899073     3.425400"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "randframe = pd.DataFrame(np.random.randn(1000,3))\n",
    "randframe.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.038703\n",
       "1    0.986338\n",
       "2    1.011587\n",
       "dtype: float64"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "randframe.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>-2.106846</td>\n",
       "      <td>-3.408329</td>\n",
       "      <td>-0.067435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>-3.591217</td>\n",
       "      <td>0.791474</td>\n",
       "      <td>0.243038</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>1.149396</td>\n",
       "      <td>-3.816239</td>\n",
       "      <td>0.328653</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>717</th>\n",
       "      <td>0.434665</td>\n",
       "      <td>-1.248411</td>\n",
       "      <td>-3.586733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>726</th>\n",
       "      <td>1.682330</td>\n",
       "      <td>1.252479</td>\n",
       "      <td>-3.090042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>955</th>\n",
       "      <td>0.272374</td>\n",
       "      <td>2.224856</td>\n",
       "      <td>3.425400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            0         1         2\n",
       "87  -2.106846 -3.408329 -0.067435\n",
       "129 -3.591217  0.791474  0.243038\n",
       "133  1.149396 -3.816239  0.328653\n",
       "717  0.434665 -1.248411 -3.586733\n",
       "726  1.682330  1.252479 -3.090042\n",
       "955  0.272374  2.224856  3.425400"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "randframe[(np.abs(randframe) > (3*randframe.std())).any(1)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Permutation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20</td>\n",
       "      <td>21</td>\n",
       "      <td>22</td>\n",
       "      <td>23</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1   2   3   4\n",
       "0   0   1   2   3   4\n",
       "1   5   6   7   8   9\n",
       "2  10  11  12  13  14\n",
       "3  15  16  17  18  19\n",
       "4  20  21  22  23  24"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nframe = pd.DataFrame(np.arange(25).reshape(5,5))\n",
    "nframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 3, 0, 4, 1])"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_order = np.random.permutation(5)\n",
    "new_order"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20</td>\n",
       "      <td>21</td>\n",
       "      <td>22</td>\n",
       "      <td>23</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1   2   3   4\n",
       "2  10  11  12  13  14\n",
       "3  15  16  17  18  19\n",
       "0   0   1   2   3   4\n",
       "4  20  21  22  23  24\n",
       "1   5   6   7   8   9"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nframe.take(new_order)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20</td>\n",
       "      <td>21</td>\n",
       "      <td>22</td>\n",
       "      <td>23</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1   2   3   4\n",
       "3  15  16  17  18  19\n",
       "4  20  21  22  23  24\n",
       "2  10  11  12  13  14"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_order = [3,4,2]\n",
    "nframe.take(new_order)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Random Sampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3, 3, 3])"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample = np.random.randint(0, len(nframe), size=3)\n",
    "sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1   2   3   4\n",
       "3  15  16  17  18  19\n",
       "3  15  16  17  18  19\n",
       "3  15  16  17  18  19"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nframe.take(sample)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## String Manipulation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Built-in Methods for Manipulation of Strings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['16 Bolton Avenue ', ' Boston']"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text = '16 Bolton Avenue , Boston'\n",
    "text.split(',')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['16 Bolton Avenue', 'Boston']"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokens = [s.strip() for s in text.split(',')]\n",
    "tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'16 Bolton Avenue'"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "address, city = [s.strip() for s in text.split(',')]\n",
    "address"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Boston'"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'16 Bolton Avenue,Boston'"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "address + ',' + city"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'A+;A;A-;B;BB;BBB;C+'"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "strings = ['A+', 'A', 'A-', 'B', 'BB', 'BBB', 'C+']\n",
    "';'.join(strings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'Boston' in text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text.index('Boston')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text.find('Boston')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "substring not found",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-86-e44f5210d36c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'New York'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m: substring not found"
     ]
    }
   ],
   "source": [
    "text.index('New York')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text.find('New York')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text.count('e')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text.count('Avenue')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'16 Bolton Street , Boston'"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text.replace('Avenue','Street')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'6 Bolton Avenue , Boston'"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text.replace('1','')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Regular Expressions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['This', 'is', 'an', 'odd', 'text!']"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text = \"This is      an\\t odd   \\n text!\"\n",
    "re.split('\\s+', text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "regex = re.compile('\\s+')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['This', 'is', 'an', 'odd', 'text!']"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "regex.split(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Avenue']"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text = 'This is my address: 16 Bolton Avenue, Boston'\n",
    "re.findall('A\\w+', text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['address', 'Avenue']"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "re.findall('[A,a]\\w+', text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(11, 18), match='address'>"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "re.search('[A,a]\\w+', text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "search = re.search('[A,a]\\w+', text)\n",
    "search.start()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "search.end()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'address'"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text[search.start():search.end()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "re.match('[A,a]\\w+', text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(0, 4), match='This'>"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "re.match('T\\w+', text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'This'"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "match = re.match('T\\w+', text)\n",
    "text[match.start():match.end()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Aggregation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### A Practical Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>object</th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>pen</td>\n",
       "      <td>5.56</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>pencil</td>\n",
       "      <td>4.20</td>\n",
       "      <td>4.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>pencil</td>\n",
       "      <td>1.30</td>\n",
       "      <td>1.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>red</td>\n",
       "      <td>ashtray</td>\n",
       "      <td>0.56</td>\n",
       "      <td>0.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>green</td>\n",
       "      <td>pen</td>\n",
       "      <td>2.75</td>\n",
       "      <td>3.15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   color   object  price1  price2\n",
       "0  white      pen    5.56    4.75\n",
       "1    red   pencil    4.20    4.12\n",
       "2  green   pencil    1.30    1.60\n",
       "3    red  ashtray    0.56    0.75\n",
       "4  green      pen    2.75    3.15"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame({ 'color': ['white','red','green','red','green'],\n",
    "                       'object': ['pen','pencil','pencil','ashtray','pen'],\n",
    "                       'price1': [5.56, 4.20, 1.30, 0.56, 2.75],\n",
    "                       'price2': [4.75,4.12,1.60,0.75,3.15]})\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.SeriesGroupBy object at 0x0000025216B020B8>"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group = frame['price1'].groupby(frame['color'])\n",
    "group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'green': Int64Index([2, 4], dtype='int64'),\n",
       " 'red': Int64Index([1, 3], dtype='int64'),\n",
       " 'white': Int64Index([0], dtype='int64')}"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color\n",
       "green    2.025\n",
       "red      2.380\n",
       "white    5.560\n",
       "Name: price1, dtype: float64"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color\n",
       "green    4.05\n",
       "red      4.76\n",
       "white    5.56\n",
       "Name: price1, dtype: float64"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hierarchical Grouping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{('green', 'pen'): Int64Index([4], dtype='int64'),\n",
       " ('green', 'pencil'): Int64Index([2], dtype='int64'),\n",
       " ('red', 'ashtray'): Int64Index([3], dtype='int64'),\n",
       " ('red', 'pencil'): Int64Index([1], dtype='int64'),\n",
       " ('white', 'pen'): Int64Index([0], dtype='int64')}"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ggroup = frame['price1'].groupby([frame['color'],frame['object']])\n",
    "ggroup.groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color  object \n",
       "green  pen        2.75\n",
       "       pencil     1.30\n",
       "red    ashtray    0.56\n",
       "       pencil     4.20\n",
       "white  pen        5.56\n",
       "Name: price1, dtype: float64"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ggroup.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>color</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>green</th>\n",
       "      <td>2.025</td>\n",
       "      <td>2.375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>2.380</td>\n",
       "      <td>2.435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>5.560</td>\n",
       "      <td>4.750</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       price1  price2\n",
       "color                \n",
       "green   2.025   2.375\n",
       "red     2.380   2.435\n",
       "white   5.560   4.750"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame[['price1','price2']].groupby(frame['color']).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>color</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>green</th>\n",
       "      <td>2.025</td>\n",
       "      <td>2.375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>2.380</td>\n",
       "      <td>2.435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>5.560</td>\n",
       "      <td>4.750</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       price1  price2\n",
       "color                \n",
       "green   2.025   2.375\n",
       "red     2.380   2.435\n",
       "white   5.560   4.750"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.groupby(frame['color']).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Group Iteration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "green\n",
      "   color  object  price1  price2\n",
      "2  green  pencil    1.30    1.60\n",
      "4  green     pen    2.75    3.15\n",
      "red\n",
      "  color   object  price1  price2\n",
      "1   red   pencil    4.20    4.12\n",
      "3   red  ashtray    0.56    0.75\n",
      "white\n",
      "   color object  price1  price2\n",
      "0  white    pen    5.56    4.75\n"
     ]
    }
   ],
   "source": [
    "for name, group in frame.groupby('color'):\n",
    "    print(name)\n",
    "    print(group)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Chain of Transformations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result1 = frame['price1'].groupby(frame['color']).mean()\n",
    "type(result1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result2 = frame.groupby(frame['color']).mean()\n",
    "type(result2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color\n",
       "green    2.025\n",
       "red      2.380\n",
       "white    5.560\n",
       "Name: price1, dtype: float64"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame['price1'].groupby(frame['color']).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color\n",
       "green    2.025\n",
       "red      2.380\n",
       "white    5.560\n",
       "Name: price1, dtype: float64"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.groupby(frame['color'])['price1'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color\n",
       "green    2.025\n",
       "red      2.380\n",
       "white    5.560\n",
       "Name: price1, dtype: float64"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(frame.groupby(frame['color']).mean())['price1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean_price1</th>\n",
       "      <th>mean_price2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>color</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>green</th>\n",
       "      <td>2.025</td>\n",
       "      <td>2.375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>2.380</td>\n",
       "      <td>2.435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>5.560</td>\n",
       "      <td>4.750</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       mean_price1  mean_price2\n",
       "color                          \n",
       "green        2.025        2.375\n",
       "red          2.380        2.435\n",
       "white        5.560        4.750"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "means = frame.groupby('color').mean().add_prefix('mean_')\n",
    "means"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Functions on Groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color\n",
       "green    2.170\n",
       "red      2.744\n",
       "white    5.560\n",
       "Name: price1, dtype: float64"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group = frame.groupby('color')\n",
    "group['price1'].quantile(0.6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "color\n",
       "green    1.45\n",
       "red      3.64\n",
       "white    0.00\n",
       "Name: price1, dtype: float64"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def range(series):\n",
    "    return series.max() - series.min()\n",
    "group['price1'].agg(range)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>color</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>green</th>\n",
       "      <td>1.45</td>\n",
       "      <td>1.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>3.64</td>\n",
       "      <td>3.37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       price1  price2\n",
       "color                \n",
       "green    1.45    1.55\n",
       "red      3.64    3.37\n",
       "white    0.00    0.00"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.agg(range)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>range</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>color</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>green</th>\n",
       "      <td>2.025</td>\n",
       "      <td>1.025305</td>\n",
       "      <td>1.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>2.380</td>\n",
       "      <td>2.573869</td>\n",
       "      <td>3.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>5.560</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        mean       std  range\n",
       "color                        \n",
       "green  2.025  1.025305   1.45\n",
       "red    2.380  2.573869   3.64\n",
       "white  5.560       NaN   0.00"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group['price1'].agg(['mean','std',range])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advanced Data Aggregation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>5.56</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>4.20</td>\n",
       "      <td>4.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>1.30</td>\n",
       "      <td>1.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>red</td>\n",
       "      <td>0.56</td>\n",
       "      <td>0.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>green</td>\n",
       "      <td>2.75</td>\n",
       "      <td>3.15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   color  price1  price2\n",
       "0  white    5.56    4.75\n",
       "1    red    4.20    4.12\n",
       "2  green    1.30    1.60\n",
       "3    red    0.56    0.75\n",
       "4  green    2.75    3.15"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame({ 'color': ['white','red','green','red','green'],\n",
    "                       'price1': [5.56, 4.20, 1.30, 0.56, 2.75],\n",
    "                       'price2': [4.75,4.12,1.60,0.75,3.15]})\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tot_price1</th>\n",
       "      <th>tot_price2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>color</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>green</th>\n",
       "      <td>4.05</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>red</th>\n",
       "      <td>4.76</td>\n",
       "      <td>4.87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>white</th>\n",
       "      <td>5.56</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       tot_price1  tot_price2\n",
       "color                        \n",
       "green        4.05        4.75\n",
       "red          4.76        4.87\n",
       "white        5.56        4.75"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sums = frame.groupby('color').sum().add_prefix('tot_')\n",
    "sums"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "      <th>tot_price1</th>\n",
       "      <th>tot_price2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>5.56</td>\n",
       "      <td>4.75</td>\n",
       "      <td>5.56</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>red</td>\n",
       "      <td>4.20</td>\n",
       "      <td>4.12</td>\n",
       "      <td>4.76</td>\n",
       "      <td>4.87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>red</td>\n",
       "      <td>0.56</td>\n",
       "      <td>0.75</td>\n",
       "      <td>4.76</td>\n",
       "      <td>4.87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>green</td>\n",
       "      <td>1.30</td>\n",
       "      <td>1.60</td>\n",
       "      <td>4.05</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>green</td>\n",
       "      <td>2.75</td>\n",
       "      <td>3.15</td>\n",
       "      <td>4.05</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   color  price1  price2  tot_price1  tot_price2\n",
       "0  white    5.56    4.75        5.56        4.75\n",
       "1    red    4.20    4.12        4.76        4.87\n",
       "3    red    0.56    0.75        4.76        4.87\n",
       "2  green    1.30    1.60        4.05        4.75\n",
       "4  green    2.75    3.15        4.05        4.75"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(frame, sums, left_on='color', right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tot_price1</th>\n",
       "      <th>tot_price2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.56</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.76</td>\n",
       "      <td>4.87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.05</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.76</td>\n",
       "      <td>4.87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.05</td>\n",
       "      <td>4.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   tot_price1  tot_price2\n",
       "0        5.56        4.75\n",
       "1        4.76        4.87\n",
       "2        4.05        4.75\n",
       "3        4.76        4.87\n",
       "4        4.05        4.75"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.groupby('color').transform(np.sum).add_prefix('tot_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "      <th>status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>white</td>\n",
       "      <td>12.33</td>\n",
       "      <td>11.23</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>black</td>\n",
       "      <td>14.55</td>\n",
       "      <td>31.80</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>white</td>\n",
       "      <td>22.34</td>\n",
       "      <td>29.99</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>white</td>\n",
       "      <td>27.84</td>\n",
       "      <td>31.18</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>black</td>\n",
       "      <td>23.40</td>\n",
       "      <td>18.25</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>black</td>\n",
       "      <td>18.33</td>\n",
       "      <td>22.44</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   color  price1  price2 status\n",
       "0  white   12.33   11.23     up\n",
       "1  black   14.55   31.80     up\n",
       "2  white   22.34   29.99   down\n",
       "3  white   27.84   31.18   down\n",
       "4  black   23.40   18.25   down\n",
       "5  black   18.33   22.44     up"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame({ 'color': ['white','black','white','white','black','black'],\n",
    "                       'status': ['up','up','down','down','down','up'],\n",
    "                       'price1': [12.33,14.55,22.34,27.84,23.40,18.33],\n",
    "                       'price2': [11.23,31.80,29.99,31.18,18.25,22.44]})\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "      <th>status</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>color</th>\n",
       "      <th>status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">black</th>\n",
       "      <th>down</th>\n",
       "      <td>black</td>\n",
       "      <td>23.40</td>\n",
       "      <td>18.25</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>up</th>\n",
       "      <td>black</td>\n",
       "      <td>18.33</td>\n",
       "      <td>31.80</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">white</th>\n",
       "      <th>down</th>\n",
       "      <td>white</td>\n",
       "      <td>27.84</td>\n",
       "      <td>31.18</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>up</th>\n",
       "      <td>white</td>\n",
       "      <td>12.33</td>\n",
       "      <td>11.23</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              color  price1  price2 status\n",
       "color status                              \n",
       "black down    black   23.40   18.25   down\n",
       "      up      black   18.33   31.80     up\n",
       "white down    white   27.84   31.18   down\n",
       "      up      white   12.33   11.23     up"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.groupby(['color','status']).apply( lambda x: x.max())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>price1</th>\n",
       "      <th>price2</th>\n",
       "      <th>status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>white</td>\n",
       "      <td>12.33</td>\n",
       "      <td>11.23</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>black</td>\n",
       "      <td>14.55</td>\n",
       "      <td>31.80</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>white</td>\n",
       "      <td>22.34</td>\n",
       "      <td>29.99</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>white</td>\n",
       "      <td>27.84</td>\n",
       "      <td>31.18</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fifth</th>\n",
       "      <td>black</td>\n",
       "      <td>23.40</td>\n",
       "      <td>18.25</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>black</td>\n",
       "      <td>18.33</td>\n",
       "      <td>22.44</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        color  price1  price2 status\n",
       "first   white   12.33   11.23     up\n",
       "second  black   14.55   31.80     up\n",
       "third   white   22.34   29.99   down\n",
       "fourth  white   27.84   31.18   down\n",
       "fifth   black   23.40   18.25   down\n",
       "5       black   18.33   22.44     up"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.rename(index=reindex, columns=recolumn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2015-01-01 00:00:00', '2015-01-01 01:00:00',\n",
       "               '2015-01-01 02:00:00', '2015-01-01 03:00:00',\n",
       "               '2015-01-01 04:00:00', '2015-01-01 05:00:00',\n",
       "               '2015-01-01 06:00:00', '2015-01-01 07:00:00',\n",
       "               '2015-01-01 08:00:00', '2015-01-01 09:00:00'],\n",
       "              dtype='datetime64[ns]', freq='H')"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp = pd.date_range('1/1/2015', periods=10, freq= 'H')\n",
    "temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2015-01-01 00:00:00    0.317051\n",
       "2015-01-01 01:00:00    0.628468\n",
       "2015-01-01 02:00:00    0.829405\n",
       "2015-01-01 03:00:00    0.792059\n",
       "2015-01-01 04:00:00    0.486475\n",
       "2015-01-01 05:00:00    0.707027\n",
       "2015-01-01 06:00:00    0.293156\n",
       "2015-01-01 07:00:00    0.091072\n",
       "2015-01-01 08:00:00    0.146105\n",
       "2015-01-01 09:00:00    0.500388\n",
       "Freq: H, dtype: float64"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "timeseries = pd.Series(np.random.rand(10), index=temp)\n",
    "timeseries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>value1</th>\n",
       "      <th>value2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-01-01 00:00:00</td>\n",
       "      <td>0.125229</td>\n",
       "      <td>0.995517</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2015-01-01 01:00:00</td>\n",
       "      <td>0.597289</td>\n",
       "      <td>0.160828</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2015-01-01 02:00:00</td>\n",
       "      <td>0.231104</td>\n",
       "      <td>0.076982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2015-01-01 03:00:00</td>\n",
       "      <td>0.862940</td>\n",
       "      <td>0.270581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2015-01-01 04:00:00</td>\n",
       "      <td>0.534056</td>\n",
       "      <td>0.306486</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2015-01-01 05:00:00</td>\n",
       "      <td>0.162040</td>\n",
       "      <td>0.979835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2015-01-01 06:00:00</td>\n",
       "      <td>0.400413</td>\n",
       "      <td>0.486397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2015-01-01 07:00:00</td>\n",
       "      <td>0.157052</td>\n",
       "      <td>0.246959</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2015-01-01 08:00:00</td>\n",
       "      <td>0.835632</td>\n",
       "      <td>0.572664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2015-01-01 09:00:00</td>\n",
       "      <td>0.812283</td>\n",
       "      <td>0.388435</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 date    value1    value2\n",
       "0 2015-01-01 00:00:00  0.125229  0.995517\n",
       "1 2015-01-01 01:00:00  0.597289  0.160828\n",
       "2 2015-01-01 02:00:00  0.231104  0.076982\n",
       "3 2015-01-01 03:00:00  0.862940  0.270581\n",
       "4 2015-01-01 04:00:00  0.534056  0.306486\n",
       "5 2015-01-01 05:00:00  0.162040  0.979835\n",
       "6 2015-01-01 06:00:00  0.400413  0.486397\n",
       "7 2015-01-01 07:00:00  0.157052  0.246959\n",
       "8 2015-01-01 08:00:00  0.835632  0.572664\n",
       "9 2015-01-01 09:00:00  0.812283  0.388435"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "timetable = pd.DataFrame( {'date': temp, 'value1': np.random.rand(10),\n",
    "                                      'value2': np.random.rand(10)})\n",
    "timetable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>value1</th>\n",
       "      <th>value2</th>\n",
       "      <th>cat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-01-01 00:00:00</td>\n",
       "      <td>0.125229</td>\n",
       "      <td>0.995517</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2015-01-01 01:00:00</td>\n",
       "      <td>0.597289</td>\n",
       "      <td>0.160828</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2015-01-01 02:00:00</td>\n",
       "      <td>0.231104</td>\n",
       "      <td>0.076982</td>\n",
       "      <td>left</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2015-01-01 03:00:00</td>\n",
       "      <td>0.862940</td>\n",
       "      <td>0.270581</td>\n",
       "      <td>left</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2015-01-01 04:00:00</td>\n",
       "      <td>0.534056</td>\n",
       "      <td>0.306486</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2015-01-01 05:00:00</td>\n",
       "      <td>0.162040</td>\n",
       "      <td>0.979835</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2015-01-01 06:00:00</td>\n",
       "      <td>0.400413</td>\n",
       "      <td>0.486397</td>\n",
       "      <td>down</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2015-01-01 07:00:00</td>\n",
       "      <td>0.157052</td>\n",
       "      <td>0.246959</td>\n",
       "      <td>right</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2015-01-01 08:00:00</td>\n",
       "      <td>0.835632</td>\n",
       "      <td>0.572664</td>\n",
       "      <td>right</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2015-01-01 09:00:00</td>\n",
       "      <td>0.812283</td>\n",
       "      <td>0.388435</td>\n",
       "      <td>up</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 date    value1    value2    cat\n",
       "0 2015-01-01 00:00:00  0.125229  0.995517     up\n",
       "1 2015-01-01 01:00:00  0.597289  0.160828   down\n",
       "2 2015-01-01 02:00:00  0.231104  0.076982   left\n",
       "3 2015-01-01 03:00:00  0.862940  0.270581   left\n",
       "4 2015-01-01 04:00:00  0.534056  0.306486     up\n",
       "5 2015-01-01 05:00:00  0.162040  0.979835     up\n",
       "6 2015-01-01 06:00:00  0.400413  0.486397   down\n",
       "7 2015-01-01 07:00:00  0.157052  0.246959  right\n",
       "8 2015-01-01 08:00:00  0.835632  0.572664  right\n",
       "9 2015-01-01 09:00:00  0.812283  0.388435     up"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "timetable['cat'] = ['up','down','left','left','up','up','down','right','right','up']\n",
    "timetable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
